# coding=utf-8
from locale import *
import sys
import datetime
from selenium import webdriver
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import csv


def get_mac_address():
    import uuid
    node = uuid.getnode()
    mac = uuid.UUID(int=node).hex[-12:]
    return mac

'''
def validate_mac_address():
    import urllib.request
    f = urllib.request.urlopen('http://amazon-ceping.xunhuanle.com/publicwelcome/getallmacaddress')
    ret_content_bytes = f.read()
    ret_content_str = ret_content_bytes.decode()
    return ret_content_str
'''

def run_crawler(url, run_times, key_words, open_file):
    # 设置不加载图片
    firefoxProfile = FirefoxProfile()
    firefoxProfile.set_preference('permissions.default.image', 2)
    firefoxProfile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', 'false')
    driver = webdriver.Firefox(firefoxProfile)
    driver.get(url)
    i = 1
    while i < run_times:
        i += 1
        soup = BeautifulSoup(driver.page_source, "html.parser")
        allItems = soup.select('div.s-result-item')
        for item in allItems:
            file = open(open_file, 'a',newline='', encoding='utf-8')
            # 查询产品状态是否unavailable是否可用
            item_text = item.get_text()
            if item_text is not None:
                if key_words is not None:
                    if key_words in item_text:
                        # 提取asin
                        asin_text = item.get("data-asin")
                        # 提取星级
                        star_ele = item.select_one('span.a-icon-alt')
                        if star_ele is not None:
                            star_text = star_ele.get_text()
                        else:
                            star_text = "Null"
                        pingLun_ele = item.select_one('a.a-link-normal > span.a-size-base')
                        if pingLun_ele is not None:
                            pingLun_text = pingLun_ele.get_text()
                            # 提取商品URL
                            m_url = item.select_one('a.a-link-normal').get("href")
                            print('https://' + host_domain + m_url + '\t' + asin_text + '\t' + star_text + '\t' + pingLun_text + '\n' + " write to file")
                            file.writelines('https://' + host_domain + m_url + '\t' + asin_text + '\t' + star_text + '\t' + pingLun_text + '\n')
                            #csv_write = csv.writer(file, dialect='excel')
                            #csv_write.writerow(['https://' + host_domain + m_url, asin_text, star_text, pingLun_text])

            file.close()
        WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.a-last a')))
        nextPageUrl = driver.find_element_by_css_selector(".a-last a").get_attribute("href")
        print(nextPageUrl)

        if i < run_times:
            driver.get(nextPageUrl)
        else:
            driver.quit()
            print('第一轮退出： one circle quit!')
            run_crawler(nextPageUrl, run_times, key_words, open_file)


mac_address = get_mac_address()
print("Your macaddress is below:")
print(mac_address)
'''
validation_content = validate_mac_address()
if mac_address not in validation_content:
    print("Please submit your unicode '"+mac_address+"' to administrator!!!")
    sys.exit()
'''
file_name = "found_link_"+datetime.datetime.now().strftime('%Y-%m-%d_%H_%M_%S')+".txt"

search_url = input("Input Search URL:")
print("Search url is:", search_url)
parseUrl = urlparse(search_url)
host_domain = parseUrl[1]
print("host domain is:", host_domain)

keyWords = input("Input Unavailable:")
keyWords = keyWords.strip()
print("Unavailable Keywords is:", keyWords)

runTimes = input("Run Times Every Cycle:")
runTimes = runTimes.strip()
print("Run Times Every Cycle is:", runTimes)

setlocale(LC_NUMERIC, 'English_US')

run_crawler(search_url, int(runTimes), keyWords, file_name)






